require 'html/xmltree'
require 'extract/extractor'

module Extract
  
  class HTMLExtractor < Extractor
    def initialize(opt = {})
      super opt
      @parser = HTMLTree::XMLParser.new false, true
    end
    
    protected
    
    def open_doc doc
      begin
        @parser.reset
        @parser.feed doc
      rescue REXML::ParseException => e  
        #continue with a partial xml parsed document if possible
        raise e if @parser.document.root.nil?
      end
      @parser.document.root
    end
  end
  
end